Packages

pacman::p_load(
  dplyr,
  tidyverse,
  readxl,
  vegan,
  reshape2,
  RColorBrewer,
  data.table,
  grid,
  ggh4x,
  ggsci)
# setwd("D:/PhD/01_Data/01_Baseline/Model_1") # Windows
# setwd("/Volumes/Yiming_Wang/PhD/01_Data/01_Baseline/Model_1/Tissue") # Mac

setwd("/Volumes/Yiming_Wang/PhD/15_Manuscript/Submission/07_ISME/Review comments/Revision data/26 April 2024")#Yiming's imac pro

df_all <- read_excel("Baseline_S47_Tissue.xlsx") %>% 
  filter(SampleType != "Faeces") %>% 
  mutate(GenotypeSex_Type = paste(GenotypeSex, SampleType, sep="_")) %>% 
  mutate(Genotype=factor(Genotype,levels=c("WT","KO"))) %>% 
  mutate(Sex=factor(Sex,levels=c("Male"))) %>% 
  mutate(GenotypeSex = factor(GenotypeSex,levels = c("WT Male","KO Male"))) %>% 
  mutate(GenotypeSex_Type = factor(GenotypeSex_Type, 
                                   levels = c("WT Male_ST1_T","WT Male_ST2_T","WT Male_LT","WT Male_Faeces","KO Male_ST1_T","KO Male_ST2_T","KO Male_LT","KO Male_Faeces")))%>% 
  mutate(Sample=row_number()) %>% 
  relocate(Sample,GenotypeSex_Type)

df_all$Sample <- sprintf("%02d", as.numeric(df_all$Sample)) 

# For future facet_grid()
df_all<- df_all %>% 
  mutate(Sample = factor(paste0("Sample", " ",df_all$Sample))) %>% 
  mutate(FigureID =row_number()) %>% 
  relocate(FigureID) %>% 
  mutate(FigureID=ifelse(FigureID %in% c(1,2,3), 1,
                         ifelse(FigureID %in% c(4,5,6),2,
                                ifelse(FigureID %in% c(7,8,9),3,
                                       ifelse(FigureID %in% c(10,11,12),4,
                                                     ifelse(FigureID %in% c(13,14,15),5,
                                                            ifelse(FigureID %in% c(16,17,18),1,
                                                                   ifelse(FigureID %in% c(19,20,21),2,
                                                                          ifelse(FigureID %in% c(22,23,24),3,
                                                                                 ifelse(FigureID %in% c(25,26,27),4,5))))))))))



# Subgroup by variables
#30436d WT Male
#9ea5c2 WT Female
#654e3a KO Male
#bcaba6 KO Female

#3C5488B2 WT
#7E6148B2 KO
#D98594FF Female
#94C5CCFF Male

Shape data

Define Core taxa

df_presence <- df_all %>%  
  gather(Taxa, Abundance, -c(1:9)) %>% 
  select(1,2,3,4,5,8,9,10,11) %>% 
  filter(Abundance != 0) %>% # save the taxa of which abundance is not equal to 0
  group_by(Taxa) %>% 
  summarise(n=n_distinct(SequenceID)) %>% #get number of samples that have non-zero abundance bacteria as grouped by bacteria# 
  mutate(n.sample=nrow(df_all)) %>% 
  mutate(p=n/n.sample)

# Sample size is small, variance is large and three different tissue types has different composition, thus the presence criteria was set as 0.5 than 0.95
df_presence_core <- df_presence %>% 
  filter (p >= 0.6)

df_abundance <- df_all %>%
  gather(Taxa, Abundance, -c(1:9)) %>% # transform to long table
  select(1,2,3,4,5,8,9,10,11) %>%
  group_by(Taxa) %>% 
  summarise(mean_abundance=mean(Abundance,na.rm=T),
            median_abundance = median(Abundance,na.rm=T))

df_abundance_core <- df_abundance %>%
  filter(mean_abundance>0.0001) 

df_all_core <- left_join(df_presence_core, df_abundance_core, by="Taxa")

df_for_figure <- df_all %>% 
  gather(Taxa, Abundance, -c(1:9)) %>% 
  mutate(Taxa = ifelse(Taxa %in% df_all_core$Taxa, Taxa, "Non-core taxa")) %>% 
  group_by(FigureID, Sample, SequenceID, Genotype, Sex, GenotypeSex, Taxa, SampleType, GenotypeSex_Type) %>%
  summarise(Abundance = sum(Abundance)) %>% 
  spread (Taxa, Abundance)

Melt the dataframe

#Create dataset with just headings and all taxa data
all_taxa1 <- df_for_figure %>% subset(select = c(2,9:ncol(df_for_figure))) 


#Create dataset with just headings and all metadata
all_taxa1_names <- df_for_figure[,1:8]


#"Melt" data so it becomes a long list of each cell
all_taxa2<- melt(all_taxa1, id = c("Sample"))
all_taxa2$Mice <- factor(all_taxa2$Sample, levels=unique(all_taxa2$Sample)) #factor the ID column

#Add metadata to melt 
all_taxa2_names <- left_join(all_taxa2, all_taxa1_names, by="Sample")

Figure

Sort taxa and Define color

#Generate bar plot with separate lengend
# Choose the most abundant taxa as the first bar and decedent the samples
order<-all_taxa2_names%>%
  filter(variable == "Lactobacillus")%>%
  arrange(desc(value))

# Factor your oder
order$Sample<-factor(order$Sample,levels=unique(order$Sample))

# Apply the factor level to your original data
all_taxa2_names$Sample<-factor(all_taxa2_names$Sample,levels = order$Sample)

# Define color
mypal = pal_simpsons("springfield", alpha = 0.7)(14)
mypal
##  [1] "#FED439B2" "#709AE1B2" "#8A9197B2" "#D2AF81B2" "#FD7446B2" "#D5E4A2B2"
##  [7] "#197EC0B2" "#F05C3BB2" "#46732EB2" "#71D0F5B2" "#370335B2" "#075149B2"
## [13] "#C80813B2" "#91331FB2"
library("scales")
show_col(mypal)

set.seed(860) #860
mypal<-sample(mypal)
show_col(mypal)

Bar plot

# Draw bar plot
figure <- ggplot(all_taxa2_names, aes(x = FigureID, fill = reorder(variable,-value), y = value))+ #reorder taxa based on their relative abundance
  geom_bar(position="fill", stat="identity", linetype = 1, width = 0.7, colour = 'black',size = 0.2)+ #can replace colour = "black" with linetype = 0 for no outline
  theme(axis.title = element_text(face="plain", size = 10),
        axis.title.x=element_text(size =10),
        axis.title.y=element_text(size =20, margin = margin(r = 5)),
        axis.text.x=element_blank(), 
        axis.text.y=element_text(colour="black", face="plain", size=15),
        axis.ticks.x=element_blank(),
        plot.title = element_text(size=10, hjust=0.5, face="plain"),
        legend.position = "right",
        plot.background = element_rect(fill="transparent", colour = "transparent"))+
  guides(fill=guide_legend(ncol=1))+
  scale_y_continuous(expand = c(0,0),labels=scales::percent) + 
  labs(x = "", y = "Relative Abundance (%)", fill = "OTU")+
  scale_fill_manual(values = c(mypal)) + 
  facet_grid(GenotypeSex~SampleType,scales = "fixed") + theme(strip.text = element_text(colour = 'white', size = rel(2)))

figure

Legend

## Three columns
all_bar_legend_set <- ggplot(all_taxa2_names, aes(x = Sample, fill = reorder(variable,-value), y = value))+
  geom_bar(position="fill", stat="identity", linetype = 1, width = 0.8, color = "black", size = 0.3)+
  theme(legend.title = element_text(size = 8, face = "bold"), legend.text = element_text(size = 6, face = "bold", colour = "black"), legend.key.size = unit(0.2, "cm"))+
  scale_fill_manual(values = c(mypal))+
  guides(fill=guide_legend(ncol=3))


all_bar_legend <- cowplot::get_legend(all_bar_legend_set)
grid.newpage()
grid.draw(all_bar_legend)

## Two columns
draw_key_polygon3 <- function(data, params, size) {
  lwd <- min(data$size, min(size) / 4)
  
  grid::rectGrob(
    width = grid::unit(0.6, "npc"),
    height = grid::unit(0.6, "npc"),
    gp = grid::gpar(
      col = data$colour,
      fill = alpha(data$fill, data$alpha),
      lty = data$linetype,
      lwd = lwd * .pt,
      linejoin = "mitre"
    ))
}

# register new key drawing function, 
# the effect is global & persistent throughout the R session
GeomBar$draw_key = draw_key_polygon3

all_bar_legend_set <- ggplot(all_taxa2_names, aes(x = Mice, fill = reorder(variable,-value), y = value))+
  geom_bar(position="fill", stat="identity", linetype = 1, width = 0.8, color = "black", size = 0.3)+
  theme(legend.title = element_text(size = 8, face = "bold"), 
        legend.text = element_text(size = 12, face = "plain", colour = "black"), 
        legend.spacing.y = unit(1, 'cm'),
        legend.key = element_rect(color = NA, fill = NA),
        legend.key.size = unit(0.6, "cm"))+
  scale_fill_manual(values = c(mypal))+
  guides(fill=guide_legend(ncol=2,override.aes = list(colour = "black", size = 0.25)))
all_bar_legend_set 

all_bar_legend <- cowplot::get_legend(all_bar_legend_set)
grid.newpage()
grid.draw(all_bar_legend)